years <- c(103, 104, 105, 106)
all.df <- data.frame()
for(i in 1:4) {
message(years[i])
file.name <- paste0("./data/", years[i], "年來臺旅客消費及動向調查(原始資料).csv")
df <- read.csv(file.name, fileEncoding = "BIG5")
df <- df %>%
select(stay, freq, purp1, purp2, type, prepay, pmoney, pdollar, airf, pit1, pit2, pit3, pit4, pit5, money, dollar, money1, p1, money2, p2, money3, p3, money4, p4, money5, p5, money6, p6, m601, m602, m603, m604, m605, m606, m607, m608, m609, m610, act01, act02, act03, act04, act05, act06, act07, act08, act09, act10, act11, act12, act13, act14, act15, act16, act17, act18, act19, nation, age, income, educ, occup, gender) %>%
mutate(id=sprintf("%d%04d", years[i], as.numeric(rownames(.)))) %>%
filter(purp1 == 1 | purp2 == 1)
df$year = years[i]
all.df <- rbind(all.df, df)
}
## 103
## 104
## 105
## 106
saveRDS(all.df, "alldf.rds")
alldf <- readRDS("./alldf.rds")
dim(alldf)
## [1] 23989 65
alltb <- as.tibble(alldf)
alltb[alltb=="."] <- NA
alltb <- alltb %>%
select(id,year,everything())
# alltb
cols = 1:length(alltb)
alltb[,cols] <- apply(alltb[,cols], 2, function(x) as.numeric(as.character(x)))
alltb
# glimpse(alltb)
# NA & double
(PreSum <- alltb %>%
group_by(pdollar) %>%
summarise(n=n(),mean_pmoney=log(mean(pmoney)), sd=sd(pmoney)))
PreSum %>%
ggplot(aes(x=pdollar, y=mean_pmoney))+
geom_point()+
xlab("Dollar(Category)")+ylab("Mean_Pmoney(log)")+
ggtitle("Before Fixing XR")
## Warning: Removed 1 rows containing missing values (geom_point).